// ANCILLARY SERVICES REGRESSIONS

clear all 
set more off
set matsize 8000
cd "${PATH}/data"

**********************
save "data15min_cluster_TSO.dta", emptyok replace



use "data_15min", clear

// total RES (wind + solar) production
gen renewables = solar_gen + wind_gen // note wind_gen includes both onshore and offshore production


// ANCILLARY SERVICE COSTS
replace scr_op_pos = abs(scr_op_pos) if scr_op_pos < 0 
// one error in data 

// Define total AS COSTS
gen ancillary_costs_1 = price_reBAP * (scr_op_pos - scr_op_neg + ///
	mr_op_pos - mr_op_neg)
// see: https://www.regelleistung.net/ext/download/REBAP_MODELL_20200701
	
su ancillary_costs_1, d 


egen TSO_id = group(TSO)
gen hour = hh(time_utc)
gen day_of_week = dow(dofc(time_utc))
gen month = month(dofc(time_utc))
gen year = year(dofc(time_utc))
egen hour_month = group(hour month)
egen hour_month_day_of_week = group(hour month day_of_week)	
gen date = dofc(time_utc)
format date %td


xtset TSO_id time_utc
*************************
tempfile temp1 temp2
save `temp1'
*************



* (1) Build load clusters 
collapse load , by(TSO date hour)

// Clustering (k means) + cubic functional form. // 
egen date_indic = group(TSO date)
egen load_max = max(load), by(date_indic)

reshape wide load, i(date_indic) j(hour) 
save `temp2'

foreach t in "50Hertz" "Amprion" "TenneT" "TransnetBW" {

	use  `temp2', clear
	keep if TSO == "`t'"

	// Robustness for number of clusters and starting value 31787 (OK)
	cluster k load* load_max, k(3) name(kload_`t') s(kr(31787)) mea(abs) keepcen

	append using "data15min_cluster_TSO.dta"
	save "data15min_cluster_TSO.dta", replace
}

egen kload = rowtotal(kload_*)
drop if kload == 0
drop kload_*
tab kload TSO

reshape long load, i(date_indic) j(hour)


// SWAP LABELS FOR MEDIUM AND LOW DEMAND FOR 50Hertz SO THAT MARKERS ARE CONSISTENT ACROSS GRAPHS
gen kload_temp = 3 if kload == 2 & TSO == "50Hertz"
replace kload_temp = 2 if kload == 3 & TSO == "50Hertz"
replace kload_temp = 1 if kload == 1 & TSO == "50Hertz"
replace kload = kload_temp if TSO == "50Hertz"
drop kload_temp

save "data15min_cluster_TSO.dta", replace




// FIGURE B.1, ONLINE APPENDIX // 

use "data15min_cluster_TSO.dta", clear
replace load = load / 1e3  // THIS TRANSFORMS IN GWH

collapse (mean) load = load ///
	(sd) SD = load, by(TSO hour kload)

gen lb = load - SD
gen ub = load + SD

local j = 1
foreach t in "50Hertz" "Amprion" "TenneT" "TransnetBW" {
	
	preserve
	keep if TSO == "`t'"
	
// 	SAME RANGE FOR VERTICAL AXIS:
/*
	tw scatter load hour if kload == 1, ms(o) msize(small) xlabel(0(5)24) ylabel(5000(12000)30000) || rcap lb ub hour if kload == 1 || ///
		scatter load hour if kload == 2, ms(th) msize(small) xlabel(0(5)24) ylabel(5000(12000)30000) || rcap lb ub hour if kload == 2 ||  ///
		scatter load hour if kload == 3, ms(oh) msize(small) xlabel(0(5)24) ylabel(5000(12000)30000) || rcap lb ub hour if kload == 3	, /// 
		legend(off) ///
		title("`t'") ytitle("GWh") xtitle("hour") ylabel(, grid)	///
		graphregion(lwidth(none) ilwidth(none)) plotregion(lwidth(none) ilwidth(none)) scheme(s1color) ///
		name(graph_`t', replace)
	
	// 		legend(lab(1 "cluster 1") lab(2 " ") lab(3 "cluster 2") lab(4 " ") lab(5 "cluster 3") lab(6 " ") ring(0) position(11) size(vsmall)) ///
*/

// 	DIFFERENT RANGE FOR VERTICAL AXIS

if (`j' != 4) { 
	tw scatter load hour if kload == 1, ms(o) || rcap lb ub hour if kload == 1, lcolor(green) || ///
		scatter load hour if kload == 2, ms(th) || rcap lb ub hour if kload == 2, lcolor(blue) ||  ///
		scatter load hour if kload == 3, ms (oh) || rcap lb ub hour if kload == 3, lcolor(gray) ///
		legend(off) ///
		ytitle("GWh") xtitle("hour") ylabel(, grid)	title("`t'")		///
		graphregion(lwidth(none) ilwidth(none)) plotregion(lwidth(none) ilwidth(none)) scheme(s1color) ///
		name(graph_`t', replace)
}

else {
		tw scatter load hour if kload == 1, ms(o) || rcap lb ub hour if kload == 1, lcolor(green) || ///
		scatter load hour if kload == 2, ms(th) || rcap lb ub hour if kload == 2, lcolor(blue) ||  ///
		scatter load hour if kload == 3, ms (oh) || rcap lb ub hour if kload == 3, lcolor(gray) ///
		legend(label(1 "High demand") label(3 "Low demand") label(5 "Medium demand") order(1 5 3) rows(1)) ///
		ytitle("GWh") xtitle("hour") ylabel(, grid)	title("`t'")		///
		graphregion(lwidth(none) ilwidth(none)) plotregion(lwidth(none) ilwidth(none)) scheme(s1color) ///
		name(graph_`t', replace)
}


	restore	
	local j = `j' + 1
}
 
//graph combine graph_50Hertz graph_Amprion graph_TenneT graph_TransnetBW, scheme(s1color)
grc1leg graph_50Hertz graph_Amprion graph_TenneT graph_TransnetBW, leg(graph_TransnetBW) span scheme(s1color)

graph export "../figures/clusters_by_TSO.pdf", replace





* (2) Use original 15min data + add information on cluster
use `temp1' , clear

merge m:1 TSO date hour using "data15min_cluster_TSO.dta", keepusing(kload)
keep if _merge == 3
drop _merge


// KEEP ONLY OBSERVATIONS WITH POSITIVE OUTPUT
keep if solar_gen > 0				

** CUBIC 
gen solar2 = solar_gen^2
gen solar3 = solar_gen^3
gen load2 = load^2
gen load3 = load^3 
gen solar_load = solar_gen * load
gen solar_load2 = solar_gen * load^2
gen solar2_load = solar_gen^2 * load 


// EITHER ESTIMATE TOGETHER OR ALLOW FOR DIFFERENT COEFFICIENTS BY TSO-CLUSTER:

estimates clear
// CUBIC POLYNOMIAL
forvalues c = 1 / 3 {
	eststo: areg ancillary_costs_1 solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load /// 
		i.TSO_id#i.hour i.TSO_id#i.day_of_week i.TSO_id#i.month i.TSO_id#i.year /// 
		i.hour#i.day_of_week i.hour#i.year ///
		i.day_of_week#i.month i.day_of_week#i.year if kload == `c', ///
		a(hour_month)	
}	


// BY CLUSTER AND TSO: (WE SPLIT THE TABLE SO THAT IT FITS IN LATEX)

estimates clear
// CUBIC POLYNOMIAL
foreach t in "50Hertz" "Amprion" {
	forvalues c = 1 / 3 {
		eststo: areg ancillary_costs_1 solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load /// 
			i.hour#i.day_of_week i.hour#i.year ///
			i.day_of_week#i.month i.day_of_week#i.year if kload == `c' & TSO == "`t'", ///
			a(hour_month)	
	}
}	

esttab est* , se r2 keep(solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load)
esttab using "../results/table_AS_clusters_v2_part1.tex", replace ///
	se keep(solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load) scalars(r2 N)


estimates clear
// CUBIC POLYNOMIAL
foreach t in "TenneT" "TransnetBW" {
	forvalues c = 1 / 3 {
		eststo: areg ancillary_costs_1 solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load /// 
			i.hour#i.day_of_week i.hour#i.year ///
			i.day_of_week#i.month i.day_of_week#i.year if kload == `c' & TSO == "`t'", ///
			a(hour_month)	
	}
}	

esttab est* , se r2 keep(solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load)
esttab using "../results/table_AS_clusters_v2_part2.tex", replace ///
	se keep(solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load) scalars(r2 N)



// DERIVATIVES OF ABOVE REGRESSION MODEL WRT SOLAR
gen dAS_dR = 5.174 +  (-0.00223) * 2 * solar_gen + ///
	( -9.83e-09) * 3 * solar_gen^2  + (0.000380) * load + ///
	 ( -6.76e-08) * load^2 + ( 0.000000189) * 2 * solar_gen * load if kload == 1 & TSO == "50Hertz"
	
replace dAS_dR =  6.455 +  (-0.000391) * 2 * solar_gen + ///
	(-3.10e-08) * 3 * solar_gen^2  + ( 0.0000577) * load + ///
	 (-6.62e-08) * load^2 + (5.46e-08) * 2 * solar_gen * load if kload == 3 & TSO == "50Hertz"	
	
replace dAS_dR =   18.44 +   (-0.00560) * 2 * solar_gen + ///
	(0.000000512) * 3 * solar_gen^2  + ( -0.00260) * load + ///
	  0.000000159 * load^2 + (0.000000185) * 2 * solar_gen * load if kload == 2 & TSO == "50Hertz"			  

replace dAS_dR = 79.60 +  0.00101 * 2 * solar_gen + ///
	(0.000000101) * 3 * solar_gen^2  + (-0.00558) * load + ///
	 0.000000103 * load^2 + (-9.49e-08) * 2 * solar_gen * load if kload == 1 & TSO == "Amprion"
	
replace dAS_dR =  -20.40 +  (-0.000250) * 2 * solar_gen + ///
	(0.000000442) * 3 * solar_gen^2  + (0.00184) * load + ///
	 -2.90e-08 * load^2 + (-0.000000117) * 2 * solar_gen * load if kload == 2 & TSO == "Amprion"	
	
replace dAS_dR =  77.84 + 0.00388 * 2 * solar_gen + ///
	(-6.73e-09) * 3 * solar_gen^2  + (-0.00705) * load + ///
	  0.000000156 * load^2 + (-0.000000155) * 2 * solar_gen * load if kload == 3 & TSO == "Amprion"			  

replace dAS_dR = 380.1 + (-0.000817) * 2 * solar_gen + ///
	(1.43e-08) * 3 * solar_gen^2  + (-0.0347) * load + ///
	  0.000000788 * load^2 + (3.51e-08) * 2 * solar_gen * load if kload == 1 & TSO == "TenneT"
	
replace dAS_dR = -3.852 + 0.00105 * 2 * solar_gen + ///
	(-5.31e-09) * 3 * solar_gen^2  + (-0.0000909) * load + ///
	 1.98e-08 * load^2 + (-5.87e-08) * 2 * solar_gen * load if kload == 2 & TSO == "TenneT"	
	
replace dAS_dR = 42.15 + 0.00161 * 2 * solar_gen + ///
	(-1.91e-08) * 3 * solar_gen^2  + (-0.00512) * load + ///
	  0.000000148 * load^2 + (-6.88e-08) * 2 * solar_gen * load if kload == 3 & TSO == "TenneT"			  

replace dAS_dR = 130.8 +  0.00561 * 2 * solar_gen + ///
	(-1.23e-08) * 3 * solar_gen^2  + (-0.0329) * load + ///
	 0.00000202 * load^2 + (-0.000000616) * 2 * solar_gen * load if kload == 1 & TSO == "TransnetBW"
	
replace dAS_dR = 40.25 + 0.0160 * 2 * solar_gen + ///
	(0.00000132) * 3 * solar_gen^2  + (-0.0217) * load + ///
	 0.00000274 * load^2 + (-0.00000375) * 2 * solar_gen * load if kload == 2 & TSO == "TransnetBW"	
	
replace dAS_dR = 105.7 + 0.0130 * 2 * solar_gen + ///
	(-0.000000160) * 3 * solar_gen^2  + (-0.0314) * load + ///
	  0.00000223 * load^2 + (-0.00000150) * 2 * solar_gen * load if kload == 3 & TSO == "TransnetBW"			  

	  
	  
// THIS IS FOR TABLE OF SUM STATS OF PARTIAL AS / PARTIAL R	  
// FROM LOWEST TO HIGHEST DEMAND
di " "
di "Cluster = 2"
tabstat dAS_dR if kload == 2, by(TSO) s(mean sd N)
di " "
di "Cluster = 3"
tabstat dAS_dR if kload == 3, by(TSO) s(mean sd N)
di " "
di "Cluster = 1"
tabstat dAS_dR if kload == 1, by(TSO) s(mean sd N)

// THIS GOES INTO THE EXCEL TABLE WITH THE RESULTS
tabstat dAS_dR, by(TSO) s(mean sd N )




// PLOT FUNCTIONS 
local mean_load_50Hertz_1 11761.13
local mean_load_Amprion_1 25681.68 
local mean_load_TenneT_1 21414.69
local mean_load_TransnetBW_1 8621.016

local mean_load_50Hertz_2  8106.496
local mean_load_Amprion_2 18660.84
local mean_load_TenneT_2 14905.77
local mean_load_TransnetBW_2 5438.981

local mean_load_50Hertz_3 10227.77
local mean_load_Amprion_3 23104.54
local mean_load_TenneT_3 18848.91
local mean_load_TransnetBW_3 6696.521

// 90th percentile of solar_gen is 4538

tw function y = 5.174 +  (-0.00223) * 2 * x + ///
	( -9.83e-09) * 3 * x^2  + (0.000380) * `mean_load_50Hertz_1' + ///
	 ( -6.76e-08) * `mean_load_50Hertz_1'^2 + ( 0.000000189) * 2 * x * `mean_load_50Hertz_1', range(0 4538) lw(medthick) col(navy) || ///
	 function y = 6.455 +  (-0.000391) * 2 * x + ///
	(-3.10e-08) * 3 * x^2  + (0.0000577) * `mean_load_50Hertz_2' + ///
	 (-6.62e-08) * `mean_load_50Hertz_2'^2 + (5.46e-08) * 2 * x * `mean_load_50Hertz_2', range(0 4538) lw(medthick) lp(medthick) col(navy) || ///
	function y = 18.44 +   (-0.00560) * 2 * x + ///
	(0.000000512) * 3 * x^2  + (-0.00260) * `mean_load_50Hertz_3' + ///
	  0.000000159 * `mean_load_50Hertz_3'^2 + (0.000000185) * 2 * x * `mean_load_50Hertz_3', range(0 4538) lw(medthick) lp(medthick) col(navy) ///
	legend(off) ytitle("{&part}AS / {&part}S ({c 0128} / MWh)") ylabel(#2, grid) xlabel(#3) xtitle("Solar output (MWh)")	title("50 Hertz") ///
	graphregion(lwidth(none) ilwidth(none)) plotregion(lwidth(none) ilwidth(none)) scheme(s1color) name(ratios, replace)
graph save g1, replace
	  
	  
tw function y = 79.60 +  0.00101 * 2 * x + ///
	(0.000000101) * 3 * x^2  + (-0.00558) * `mean_load_Amprion_1' + ///
	 0.000000103 * `mean_load_Amprion_1'^2 + (-9.49e-08) * 2 * x * `mean_load_Amprion_1', range(0 4538) lw(medthick) lp(--) col(ltblue) || ///
	 function y = -20.40 +  (-0.000250) * 2 * x + ///
	(0.000000442) * 3 * x^2  + (0.00184) * `mean_load_Amprion_2' + ///
	 -2.90e-08 * `mean_load_Amprion_2'^2 + (-0.000000117) * 2 * x * `mean_load_Amprion_2', range(0 4538) lw(medthick) lp(--) col(ltblue) || ///
	function y = 77.84 + 0.00388 * 2 * x + ///
	(-6.73e-09) * 3 * x^2  + (-0.00705) * `mean_load_Amprion_3' + ///
	  0.000000156 * `mean_load_Amprion_3'^2 + (-0.000000155) * 2 * x * `mean_load_Amprion_3', range(0 4538) lw(medthick) lp(--) col(ltblue)  ///
	legend(off) ytitle("{&part}AS / {&part}S ({c 0128} / MWh)") ylabel(#2, grid) xlabel(#3) xtitle("Solar output (MWh)")	title("Amprion") ///
	graphregion(lwidth(none) ilwidth(none)) plotregion(lwidth(none) ilwidth(none)) scheme(s1color) name(ratios, replace)
graph save g2, replace
	
		
tw function y = 380.1 + (-0.000817) * 2 * x + ///
	(1.43e-08) * 3 * x^2  + (-0.0347) * `mean_load_TenneT_1' + ///
	  0.000000788 * `mean_load_TenneT_1'^2 + (3.51e-08) * 2 * x * `mean_load_TenneT_1', range(0 4538) lw(medthick) lp(_) col(cyan) || ///
	 function y = -3.852 + 0.00105 * 2 * x + ///
	(-5.31e-09) * 3 * x^2  + (-0.0000909) * `mean_load_TenneT_2' + ///
	 1.98e-08 * `mean_load_TenneT_2'^2 + (-5.87e-08) * 2 * x * `mean_load_TenneT_2', range(0 4538) lw(medthick) lp(_) col(cyan) || ///
	function y = 42.15 + 0.00161 * 2 * x + ///
	(-1.91e-08) * 3 * x^2  + (-0.00512) * `mean_load_TenneT_3' + ///
	  0.000000148 * `mean_load_TenneT_3'^2 + (-6.88e-08) * 2 * x * `mean_load_TenneT_3', range(0 4538) lw(medthick) lp(_) col(cyan) ///
	legend(off) ytitle("{&part}AS / {&part}S ({c 0128} / MWh)") ylabel(#2, grid) xlabel(#3) xtitle("Solar output (MWh)")	title("TenneT") ///
	graphregion(lwidth(none) ilwidth(none)) plotregion(lwidth(none) ilwidth(none)) scheme(s1color) name(ratios, replace)
graph save g3, replace

	  
tw function y = 130.8 +  0.00561 * 2 * x + ///
	(-1.23e-08) * 3 * x^2  + (-0.0329) * `mean_load_TransnetBW_1' + ///
	 0.00000202 * `mean_load_TransnetBW_1'^2 + (-0.000000616) * 2 * x * `mean_load_TransnetBW_1', range(0 4500) lw(medthick) lp(.-) col(black) || ///
	 function y = 40.25 + 0.0160 * 2 * x + ///
	(0.00000132) * 3 * x^2  + (-0.0217) * `mean_load_TransnetBW_2' + ///
	 0.00000274 * `mean_load_TransnetBW_2'^2 + (-0.00000375) * 2 * x * `mean_load_TransnetBW_2', range(0 4500) lw(medthick) lp(.-) col(black) || ///
	 function y = 105.7 + 0.0130 * 2 * x + ///
	(-0.000000160) * 3 * x^2  + (-0.0314) * `mean_load_TransnetBW_3' + ///
	  0.00000223 * `mean_load_TransnetBW_3'^2 + (-0.00000150) * 2 * x * `mean_load_TransnetBW_3', range(0 4500) lw(medthick) lp(.-) col(black) ///
	legend(off) ytitle("{&part}AS / {&part}S ({c 0128} / MWh)") ylabel(#2, grid) xlabel(#4) xtitle("Solar output (MWh)")	title("TransnetBW") ///
	graphregion(lwidth(none) ilwidth(none)) plotregion(lwidth(none) ilwidth(none)) scheme(s1color) name(ratios, replace)
graph save g4, replace 
 
 
gr combine g1.gph g2.gph g3.gph g4.gph, col(2) iscale(1) ///
	graphregion(lwidth(none) ilwidth(none)) plotregion(lwidth(none) ilwidth(none)) scheme(s1color) name(ratios, replace)
graph export "../figures/function_dAS_dR_v2.pdf", replace 

erase g1.gph
erase g2.gph
erase g3.gph
erase g4.gph


// Robustness: Appendix. POOLING ALL OBSERVATIONS FOR WHICH SOLAR > 0 // 

est clear 

// QUADRATIC FUNCTION
eststo: qui areg ancillary_costs_1 solar_gen solar2 load load2 solar_load /// 
	i.TSO_id#i.hour i.TSO_id#i.day_of_week i.TSO_id#i.month i.TSO_id#i.year /// 
	i.hour#i.day_of_week i.hour#i.year ///
	i.day_of_week#i.month i.day_of_week#i.year, ///
	a(hour_month)	
  
gen dAS_dR_quadratic = -.8551619 + .0001846 * 2 * solar_gen + ///
	(-.0000246) * load 	
tabstat dAS_dR_quadratic, by(TSO) s(mean sd N) 


// CUBIC POLYNOMIAL
eststo: qui areg ancillary_costs_1 solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load /// 
	i.TSO_id#i.hour i.TSO_id#i.day_of_week i.TSO_id#i.month i.TSO_id#i.year /// 
	i.hour#i.day_of_week i.hour#i.year ///
	i.day_of_week#i.month i.day_of_week#i.year, ///
	a(hour_month)	

	
gen dAS_dR_cubic_pooled = -.5673467 + .0003943 * 2 * solar_gen + ///
	(-1.49e-08) * 3 * solar_gen^2  + (-.0000894) * load + ///
	 2.86e-09 * load^2 + (-8.50e-09) * 2 * solar_gen * load 	
tabstat dAS_dR_cubic_pooled, by(TSO) s(mean sd N) 	

esttab est*, se keep(solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load)
esttab using "../results/table_AS_pooled_v2.tex", se replace ///
	keep(solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load) scalars(r2 N)



*****************************************************************************
*****************************************************************************
// Repeat clustering for 5 TSOs (with TenneT split) // 

preserve
clear

save "data15min_cluster_TSO_TenneTSplit.dta", emptyok replace

use "data_15min", clear

expand 2 if TSO == "TenneT", gen(dup_)
gen  split = "TenneT_North" if TSO =="TenneT" & dup_ ==0 
replace split = "TenneT_South" if TSO =="TenneT" & dup_ ==1 
drop dup_

// From Supply_Demand_splitTenneT
local north_share .5892228
local south_share .4107772
gen load_split = cond(split == "TenneT_North", `north_share' * load, `south_share' * load)
replace load = load_split if TSO =="TenneT"
replace TSO = split if TSO =="TenneT" // Now celled TenneT_South and TenneT_North


egen TSO_id = group(TSO)
gen hour = hh(time_utc)
gen day_of_week = dow(dofc(time_utc))
gen month = month(dofc(time_utc))
gen year = year(dofc(time_utc))
egen hour_month = group(hour month)
egen hour_month_day_of_week = group(hour month day_of_week)	
gen date = dofc(time_utc)
format date %td

xtset TSO_id time_utc, delta(15 minutes)
*************
tempfile temp3 temp4
save `temp3'
*************

* (1) Build load clusters 
collapse load , by(TSO date hour)

// Clustering (k means) + cubic functional form. // 
egen date_indic = group(TSO date)
egen load_max = max(load), by(date_indic)

reshape wide load, i(date_indic) j(hour) 

tempfile t2
save `t2'

foreach t in "50Hertz" "Amprion" "TenneT_North" "TenneT_South" "TransnetBW" {
	use  `t2' , clear
	keep if TSO == "`t'"
	
	// Need to do robustness for k(4) and starting value 31787...
	cluster k load* load_max, k(3) name(kload_`t') s(kr(31787)) mea(abs) keepcen
	
	append using "data15min_cluster_TSO_TenneTSplit.dta"
	save "data15min_cluster_TSO_TenneTSplit.dta", replace
}

egen kload = rowtotal(kload_*)
drop if kload == 0
drop kload_*
tab kload TSO

reshape long load, i(date_indic) j(hour)


// SWAP LABELS FOR MEDIUM AND LOW DEMAND FOR 50Hertz SO THAT MARKERS ARE CONSISTENT ACROSS GRAPHS BELOW
gen kload_temp = 3 if kload == 2 & TSO == "50Hertz"
replace kload_temp = 2 if kload == 3 & TSO == "50Hertz"
replace kload_temp = 1 if kload == 1 & TSO == "50Hertz"
replace kload = kload_temp if TSO == "50Hertz"
drop kload_temp

save `temp4' // save before renaming as otherwise no merge

ren date date_day
ren hour date_hour
save "data15min_cluster_TSO_TenneTSplit.dta", replace
restore


**************************************************************************
* (2) Use original 15min data + add information on cluster
use `temp3' , clear

merge m:1 TSO date hour using `temp4', keepusing(kload)
keep if _merge == 3
drop _merge


replace scr_op_pos = abs(scr_op_pos) if scr_op_pos < 0 
// 1 error in data collection

// MAY 2021: WE CHANGED THE DEFINITION OF AS COSTS TO THIS:
gen ancillary_costs_1 = price_reBAP * (scr_op_pos - scr_op_neg + ///
	mr_op_pos - mr_op_neg)


// Need to update new solar generation for split // 
merge m:1 time_utc using "Tennet_solar_South.dta", keepusing(solar)
drop _merge
ren solar solar_BY

// UPDATE production for SOUTH / NORTH TENNET
replace solar_gen = solar_BY if TSO =="TenneT_South"
replace solar_gen = solar_gen - solar_BY if TSO =="TenneT_North"
replace solar_gen = 0 if solar_gen < 0 // very few instances when Bavaria reported more solar than TSO total
// Merging separately rather than using all from TenneT_split.dta" keeps the 15-min time intervals for solar

// KEEP ONLY OBSERVATIONS WITH POSITIVE OUTPUT
keep if solar_gen > 0				

** CUBIC 
gen solar2 = solar_gen^2
gen solar3 = solar_gen^3
gen load2 = load^2
gen load3 = load^3 
gen solar_load = solar_gen * load
gen solar_load2 = solar_gen * load^2
gen solar2_load = solar_gen^2 * load 


// BY CLUSTER AND TSO:

estimates clear
// CUBIC POLYNOMIAL
foreach t in "50Hertz" "Amprion" "TenneT_South" "TenneT_North" "TransnetBW" {
	forvalues c = 1/3 {
		eststo: areg ancillary_costs_1 solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load /// 
			i.hour#i.day_of_week i.hour#i.year ///
			i.day_of_week#i.month i.day_of_week#i.year if kload == `c' & TSO == "`t'", ///
			a(hour_month)	
	}
}	


esttab est* , se r2 keep(solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load)
// esttab using "../draft/table_AS_clusters_v2_split.tex", replace ///
// se keep(solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load) scalars(r2 N)

// UPDATED COEFFICIENTS
** NOTE: main DV is still at TenneT level, yet solar_gen and load are adjusted for TenneT_N and TenneT_S




// Derivatives of above regression model wrt solar
** COPY THESE dAS_dR numbers to TenneTSplit in version with all solar ** 
gen dAS_dR = 5.174 +  (-0.00223) * 2 * solar_gen + ///
	( -9.83e-09) * 3 * solar_gen^2  + (0.000380) * load + ///
	 ( -6.76e-08) * load^2 + ( 0.000000189) * 2 * solar_gen * load if kload == 1 & TSO == "50Hertz"
	
replace dAS_dR =  6.455 +  (-0.000391) * 2 * solar_gen + ///
	(-3.10e-08) * 3 * solar_gen^2  + (0.0000577) * load + ///
	 (-6.62e-08) * load^2 + (5.46e-08) * 2 * solar_gen * load if kload == 3 & TSO == "50Hertz"	
	
replace dAS_dR =  18.44 + (-0.00560) * 2 * solar_gen + ///
	(0.000000512) * 3 * solar_gen^2  + (-0.00260) * load + ///
	  0.000000159 * load^2 + (0.000000185) * 2 * solar_gen * load if kload == 2 & TSO == "50Hertz"			  

replace dAS_dR = 79.60 +  0.00101 * 2 * solar_gen + ///
	(0.000000101) * 3 * solar_gen^2  + (-0.00558) * load + ///
	 0.000000103 * load^2 + (-9.49e-08) * 2 * solar_gen * load if kload == 1 & TSO == "Amprion"
	
replace dAS_dR = -20.40 + (-0.000250) * 2 * solar_gen + ///
	(0.000000442) * 3 * solar_gen^2  + (0.00184) * load + ///
	 -2.90e-08 * load^2 + (-0.000000117) * 2 * solar_gen * load if kload == 2 & TSO == "Amprion"	
	
replace dAS_dR =  77.84 +  0.00388 * 2 * solar_gen + ///
	(-6.73e-09) * 3 * solar_gen^2  + (-0.00705) * load + ///
	  0.000000156 * load^2 + (-0.000000155) * 2 * solar_gen * load if kload == 3 & TSO == "Amprion"			  

replace dAS_dR =  467.9 + (-0.00219) * 2 * solar_gen + ///
	(2.35e-08) * 3 * solar_gen^2 + (-0.103) *load + ///
	(0.00000570) * load^2 + (0.000000246) *2 * solar_gen * load if kload == 1 & TSO == "TenneT_South"  

replace dAS_dR = -0.834 + (0.00221) * 2 * solar_gen + ///
	(2.13e-08) * 3 * solar_gen^2 + (-0.00168) *load + ///
	(0.000000290) * load^2 + (-0.000000367) *2 * solar_gen * load if kload == 2 & TSO == "TenneT_South"

replace dAS_dR = 66.74 + (0.00325) * 2 * solar_gen + ///
	(-0.000000162) * 3 * solar_gen^2 + (-0.0192) *load + ///
	(0.00000131) * load^2 + (-0.000000245) *2 * solar_gen * load if kload == 3 & TSO ==	"TenneT_South"

replace dAS_dR = 1052.4 + (-0.000161) * 2 * solar_gen + ///
	(0.00000105) * 3 * solar_gen^2 + ( -0.164) *load + ///
	(0.00000634) * load^2 + (-0.000000247) *2 * solar_gen * load if kload == 1 & TSO ==	 "TenneT_North" 
	  
replace dAS_dR =  9.278  + (-0.000574) * 2 * solar_gen + ///
	(-6.56e-08) * 3 * solar_gen^2 + (-0.00237) *load + ///
	(0.000000132) * load^2 + (0.000000174) *2 * solar_gen * load if kload == 2 & TSO ==  "TenneT_North" 

replace dAS_dR =  78.21 + (0.00233) * 2 * solar_gen + ///
	(-3.10e-08) * 3 * solar_gen^2 + (-0.0146) *load + ///
	(0.000000666) * load^2 + (-0.000000171) *2 * solar_gen * load if kload == 3 & TSO ==  "TenneT_North" 

replace dAS_dR = 130.8 +  0.00561 * 2 * solar_gen + ///
	(-1.23e-08) * 3 * solar_gen^2  + (-0.0329) * load + ///
	 0.00000202 * load^2 + (-0.000000616) * 2 * solar_gen * load if kload == 1 & TSO == "TransnetBW"
	
replace dAS_dR = 40.25 + 0.0160 * 2 * solar_gen + ///
	(0.00000132) * 3 * solar_gen^2  + (-0.0217) * load + ///
	 0.00000274 * load^2 + (-0.00000375) * 2 * solar_gen * load if kload == 2 & TSO == "TransnetBW"	
	
replace dAS_dR = 105.7 + 0.0130 * 2 * solar_gen + ///
	(-0.000000160) * 3 * solar_gen^2  + (-0.0314) * load + ///
	  0.00000223 * load^2 + (-0.00000150) * 2 * solar_gen * load if kload == 3 & TSO == "TransnetBW"			  




*****************************************************************************
*****************************************************************************
** ROBUSTNESS 2: Germany wide dispatch ** 

// Repeat clustering with pooled market: all TSOs together // 
clear
save "data15min_cluster_TSO_joint.dta", emptyok replace

use data_15min, clear


gen renewables = solar_gen + wind_gen // note wind_gen includes both onshore and offshore production

// ANCILLARY COSTS
// REMEMBER THAT *_OP_NEG ARE ALL IN ABSOLUTE VALUE IN THE DATASET

replace scr_op_pos = abs(scr_op_pos) if scr_op_pos < 0 
// 1 error in data collection

// UPDATE May2021
gen ancillary_costs_1 = price_reBAP * (scr_op_pos - scr_op_neg + ///
	mr_op_pos - mr_op_neg)


// SUMMARY STATS
su ancillary_costs_1,d 


egen TSO_id = group(TSO)
gen hour = hh(time_utc)
gen day_of_week = dow(dofc(time_utc))
gen month = month(dofc(time_utc))
gen year = year(dofc(time_utc))
egen hour_month = group(hour month)
egen hour_month_day_of_week = group(hour month day_of_week)	
gen date = dofc(time_utc)
format date %td

xtset TSO_id time_utc
*************************
tempfile temp1 temp2
save `temp1'
*************

*
* (1) Build load clusters 
collapse load , by(date hour)

// Clustering (k means) + cubic functional form. // 
egen date_indic = group(date)
egen load_max = max(load), by(date_indic)

reshape wide load, i(date_indic) j(hour) 

// Need to do robustness for k(4) and starting value 31787...
cluster k load* load_max, k(3) name(kload_`t') s(kr(31787)) mea(abs) keepcen


egen kload = rowtotal(kload_*)
drop if kload == 0
drop kload_*
tab kload

reshape long load, i(date_indic) j(hour)

// SWAP LABELS FOR MEDIUM AND LOW DEMAND SO THAT MARKERS ARE CONSISTENT ACROSS GRAPHS
recode kload (3=2) (2=3), gen(kload_temp)
replace kload = kload_temp
drop kload_temp

saveold "data15min_cluster_TSO_joint.dta", replace



* (2) Use original 15min data + add information on cluster
use `temp1' , clear

merge m:1 date hour using "data15min_cluster_TSO_joint.dta", keepusing(kload)
keep if _merge == 3
drop _merge


*****************************************************
** UPDATE: consider German zone
// CALCULATE AS PRICE AT GERMANY WIDE SOLAR PRODUCTION AND LOAD
replace solar_gen = solar_gen_DE
replace load = load_DE
*****************************************************

// KEEP ONLY OBSERVATIONS WITH POSITIVE OUTPUT
keep if solar_gen > 0				

** CUBIC 
gen solar2 = solar_gen^2
gen solar3 = solar_gen^3
gen load2 = load^2
gen load3 = load^3 
gen solar_load = solar_gen * load
gen solar_load2 = solar_gen * load^2
gen solar2_load = solar_gen^2 * load 


// SINGLE REGRESSSION REGARDING AS: 

estimates clear
// CUBIC POLYNOMIAL
forvalues c = 1/3 {
	eststo: areg ancillary_costs_1 solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load /// 
		i.TSO_id#i.hour i.TSO_id#i.day_of_week i.TSO_id#i.month i.TSO_id#i.year /// 
		i.hour#i.day_of_week i.hour#i.year ///
		i.day_of_week#i.month i.day_of_week#i.year if kload == `c', ///
		a(hour_month)	
}	


esttab est* , se r2 keep(solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load)
// esttab using "../draft/table_AS_clusters_v2_part2.tex", replace ///
// 	se keep(solar_gen solar2 solar3 load load2 load3 solar_load solar_load2 solar2_load) scalars(r2 N)


// Derivatives of above regression model wrt solar
gen dAS_dR = 21.06 +  ( 0.000167) * 2 * solar_gen + ///
	(-4.69e-10) * 3 * solar_gen^2  + (-0.000718) * load + ///
	 ( 5.95e-09) * load^2 + (-2.25e-09) * 2 * solar_gen * load if kload == 1	

replace dAS_dR = -6.348 + (-0.000231) * 2 * solar_gen + ///
	(-3.21e-09) * 3 * solar_gen^2  + (0.000316) * load + ///
	  (-4.01e-09) * load^2 + (6.37e-09) * 2 * solar_gen * load if kload == 2 		  

replace dAS_dR = 0.273 + 0.0000246  * 2 * solar_gen + ///
	(3.16e-09) * 3 * solar_gen^2  + (-0.0000224) * load + ///
	   3.20e-10  * load^2 + (-1.47e-09) * 2 * solar_gen * load if kload == 3 
	
tabstat dAS_dR, by(kload) s(mean N)	

